NewInternet_3 == "Yes, phone" ~ "yes",
TRUE ~ NA_character_
),
internet_job = case_when(
NewInternet_1 == "Yes, job" ~ "yes",
NewInternet_2 == "Yes, job" ~ "yes",
NewInternet_3 == "Yes, job" ~ "yes",
TRUE ~ NA_character_
),
internet_none = case_when(
NewInternet_1 == "No" & is.na(NewInternet_2) & is.na(NewInternet_3) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_2 == "No" & is.na(NewInternet_1) & is.na(NewInternet_3) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_3 == "No" & is.na(NewInternet_2) & is.na(NewInternet_1) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_4 == "No" & is.na(NewInternet_2) & is.na(NewInternet_3) &
is.na(NewInternet_1) & is.na(NewInternet_5)  ~ "none",
TRUE ~ NA_character_
),
internet_dk = case_when(
NewInternet_1 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_3) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_2 == "'Don't Know/refused" & is.na(NewInternet_1) &
is.na(NewInternet_3) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_3 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_1) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_5 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_3) & is.na(NewInternet_1) & is.na(NewInternet_5)  ~ "dk",
TRUE ~ NA_character_
)
) %>%
select(-starts_with("NewInternet_"))
# Minor renaming
dat2017 <-
dat2017 %>%
rename(USMUS_TRUMPRACIST = USMIS_TRUMPRACIST)
dat2017 == dat2017_orig
table(dat2017 == dat2017_orig)
which(dat2017 != dat2017_orig)
# Clean 2017 ---------------------------
# Load data
dat2017 <- read_dta("input/ADLraw/ADL 2017 Combined File.dta")
# Manually convert labels and set encoding
dat2017$GERMANY_NUTS2 <-
factor(dat2017$GERMANY_NUTS2,
labels = c("Stuttgart", "Karlsruhe", "Freiburg", "Tubingen",
"Oberbayern", "Niederbayern", "Oberpfalz", "Oberfranken",
"Mittelfranken", "Unterfranken", "Schwaben", "Berlin",
"Brandenburg", "Bremen", "Hamburg", "Darmstadt", "Geissen",
"Kassel", "Mecklenburg-Vorpommern", "Braunschweig",
"Hannover", "Lüneburg", "Weser-ems", "Düsseldorf",
"Köln", "Münster", "Detmold", "Arnsberg", "Koblenz",
"Trier", "Rheinhessen-Pfalz", "Saarland", "Dresden",
"Chemnitz", "Leipzig", "Sachsen-Anhalt",
"Schleswig-Holstein", "Thüringen"))
# Change to labels
dat2017 <-
dat2017 %>%
mutate_at(vars(wave, id, yearborn, Age, InternationalTotalWeight),
as.numeric) %>%
mutate_at(vars(-wave, -id, -yearborn, -Age, -InternationalTotalWeight),
as_character) %>%
mutate(respnumber = seq(1:nrow(dat2017))) %>%
select(-id)
# Country name
dat2017 <-
dat2017 %>%
mutate(
country = as_factor(COUNTRY_2017) %>%
as.character() %>%
toupper(),
US_MUSLIM_SAMPLE = ifelse(country == "USA - MUSLIM", 1, 0),
country = recode(country, UK = "UNITED KINGDOM",
`USA - MAIN` = "USA",
`USA - MUSLIM` = "USA")
) %>%
select(-COUNTRY_2017, -COUNTRY_II)
# Date
dat2017 <-
dat2017 %>%
mutate(
date = as.character(Date),
year = case_when(
country == "MEXICO" ~ NA_real_,
TRUE ~ str_sub(Date, start = -4) %>% as.numeric()
),
month = case_when(
country == "MEXICO" ~ NA_real_,
TRUE ~ str_sub(Date, end = 1) %>% as.numeric()
),
day = case_when(
country == "MEXICO" ~ NA_real_,
TRUE ~ str_sub(Date, start = 2, end = 3) %>% as.numeric()
)) %>%
select(-Date)
# Consolidate ethnicity and language
dat2017 <-
dat2017 %>%
select(-ETHNICITY_EURO_2017, -ETHNICITY_USA, -USRACE_2017,
-HISPANIC_2017, -US_RACELIST, -ETHNICITY_MEXICO_OTHER,
-ETHNICITY_USA_STRING, -LANG_FRANCE, -LANG_GERMANY, -LANG_UK) %>%
unite(ethnicity, c(CombRace_USAONLY, ETHNICTY_MEXICO,
ETHNICITY_UK_NEW, ETHNICITY_FRANCE_NEW,
ETHNICITY_GERMANY_NEW), sep = "", na.rm = T) %>%
mutate(LANG_USA_STRING = na_if(LANG_USA_STRING, "") %>%
recode(`1` = "ENGLISH", `2` = "ARABIC")) %>%
unite(language, starts_with("LANG_"), sep = "", na.rm = T)
# Consolidate geo
dat2017 <-
dat2017 %>%
select(-GEO_EURO_2017) %>%
unite(geo1, c(georecode, GEO_USA, GEO_MEXICO, GEO_UK_NEW,
GEO_FRANCE_NEW, GEO_GERMANY_NEW),
sep = "", na.rm = T) %>%
unite(geo2, c(GEOII_USA, UK_NUTS1, GERMANY_NUTS1, FRANCE_NUTS1),
sep = "", na.rm = T) %>%
unite(geo3, c(US_STATE, UK_NUTS2, GERMANY_NUTS2, FRANCE_NUTS2),
sep = "", na.rm = T)
# religion
dat2017 <-
dat2017 %>%
select(-US_RELIGIONLIST, -USAMUSLIM_RELIGION, -RELIGION_STRINGOTHER)
# age
dat2017 <-
dat2017 %>%
select(-New_Age, -New_Age50, -ED_Age2, -EDAGE_2017)
# phone type
dat2017 <-
dat2017 %>%
select(-PHONETYPE_RECODE)
# Internet access
dat2017 <-
dat2017 %>%
mutate(
internet_home = case_when(
NewInternet_1 == "Yes, home" ~ "yes",
NewInternet_2 == "Yes, home" ~ "yes",
NewInternet_3 == "Yes, home" ~ "yes",
TRUE ~ NA_character_
),
internet_phone = case_when(
NewInternet_1 == "Yes, phone" ~ "yes",
NewInternet_2 == "Yes, phone" ~ "yes",
NewInternet_3 == "Yes, phone" ~ "yes",
TRUE ~ NA_character_
),
internet_job = case_when(
NewInternet_1 == "Yes, job" ~ "yes",
NewInternet_2 == "Yes, job" ~ "yes",
NewInternet_3 == "Yes, job" ~ "yes",
TRUE ~ NA_character_
),
internet_none = case_when(
NewInternet_1 == "No" & is.na(NewInternet_2) & is.na(NewInternet_3) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_2 == "No" & is.na(NewInternet_1) & is.na(NewInternet_3) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_3 == "No" & is.na(NewInternet_2) & is.na(NewInternet_1) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_4 == "No" & is.na(NewInternet_2) & is.na(NewInternet_3) &
is.na(NewInternet_1) & is.na(NewInternet_5)  ~ "none",
TRUE ~ NA_character_
),
internet_dk = case_when(
NewInternet_1 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_3) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_2 == "'Don't Know/refused" & is.na(NewInternet_1) &
is.na(NewInternet_3) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_3 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_1) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_5 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_3) & is.na(NewInternet_1) & is.na(NewInternet_5)  ~ "dk",
TRUE ~ NA_character_
)
) %>%
select(-starts_with("NewInternet_"))
# Minor renaming
dat2017 <-
dat2017 %>%
rename(USMUS_TRUMPRACIST = USMIS_TRUMPRACIST)
dat2017_bjn <- dat2017
# Clean 2017 ---------------------------
# Load data
dat2017 <- read_dta("input/ADLraw/ADL 2017 Combined File.dta")
# Change to labels
dat2017 <-
dat2017 %>%
mutate_at(vars(wave, id, yearborn, Age, InternationalTotalWeight),
as.numeric) %>%
mutate_at(vars(-wave, -id, -yearborn, -Age, -InternationalTotalWeight),
as_character) %>%
mutate(respnumber = seq(1:nrow(dat2017))) %>%
select(-id)
# Country name
dat2017 <-
dat2017 %>%
mutate(
country = as_factor(COUNTRY_2017) %>%
as.character() %>%
toupper(),
US_MUSLIM_SAMPLE = ifelse(country == "USA - MUSLIM", 1, 0),
country = recode(country, UK = "UNITED KINGDOM",
`USA - MAIN` = "USA",
`USA - MUSLIM` = "USA")
) %>%
select(-COUNTRY_2017, -COUNTRY_II)
# Date
dat2017 <-
dat2017 %>%
mutate(
date = as.character(Date),
year = case_when(
country == "MEXICO" ~ NA_real_,
TRUE ~ str_sub(Date, start = -4) %>% as.numeric()
),
month = case_when(
country == "MEXICO" ~ NA_real_,
TRUE ~ str_sub(Date, end = 1) %>% as.numeric()
),
day = case_when(
country == "MEXICO" ~ NA_real_,
TRUE ~ str_sub(Date, start = 2, end = 3) %>% as.numeric()
)) %>%
select(-Date)
# Consolidate ethnicity and language
dat2017 <-
dat2017 %>%
select(-ETHNICITY_EURO_2017, -ETHNICITY_USA, -USRACE_2017,
-HISPANIC_2017, -US_RACELIST, -ETHNICITY_MEXICO_OTHER,
-ETHNICITY_USA_STRING, -LANG_FRANCE, -LANG_GERMANY, -LANG_UK) %>%
unite(ethnicity, c(CombRace_USAONLY, ETHNICTY_MEXICO,
ETHNICITY_UK_NEW, ETHNICITY_FRANCE_NEW,
ETHNICITY_GERMANY_NEW), sep = "", na.rm = T) %>%
mutate(LANG_USA_STRING = na_if(LANG_USA_STRING, "") %>%
recode(`1` = "ENGLISH", `2` = "ARABIC")) %>%
unite(language, starts_with("LANG_"), sep = "", na.rm = T)
# Consolidate geo
dat2017 <-
dat2017 %>%
select(-GEO_EURO_2017) %>%
unite(geo1, c(georecode, GEO_USA, GEO_MEXICO, GEO_UK_NEW,
GEO_FRANCE_NEW, GEO_GERMANY_NEW),
sep = "", na.rm = T) %>%
unite(geo2, c(GEOII_USA, UK_NUTS1, GERMANY_NUTS1, FRANCE_NUTS1),
sep = "", na.rm = T) %>%
unite(geo3, c(US_STATE, UK_NUTS2, GERMANY_NUTS2, FRANCE_NUTS2),
sep = "", na.rm = T)
# religion
dat2017 <-
dat2017 %>%
select(-US_RELIGIONLIST, -USAMUSLIM_RELIGION, -RELIGION_STRINGOTHER)
# age
dat2017 <-
dat2017 %>%
select(-New_Age, -New_Age50, -ED_Age2, -EDAGE_2017)
# phone type
dat2017 <-
dat2017 %>%
select(-PHONETYPE_RECODE)
# Internet access
dat2017 <-
dat2017 %>%
mutate(
internet_home = case_when(
NewInternet_1 == "Yes, home" ~ "yes",
NewInternet_2 == "Yes, home" ~ "yes",
NewInternet_3 == "Yes, home" ~ "yes",
TRUE ~ NA_character_
),
internet_phone = case_when(
NewInternet_1 == "Yes, phone" ~ "yes",
NewInternet_2 == "Yes, phone" ~ "yes",
NewInternet_3 == "Yes, phone" ~ "yes",
TRUE ~ NA_character_
),
internet_job = case_when(
NewInternet_1 == "Yes, job" ~ "yes",
NewInternet_2 == "Yes, job" ~ "yes",
NewInternet_3 == "Yes, job" ~ "yes",
TRUE ~ NA_character_
),
internet_none = case_when(
NewInternet_1 == "No" & is.na(NewInternet_2) & is.na(NewInternet_3) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_2 == "No" & is.na(NewInternet_1) & is.na(NewInternet_3) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_3 == "No" & is.na(NewInternet_2) & is.na(NewInternet_1) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_4 == "No" & is.na(NewInternet_2) & is.na(NewInternet_3) &
is.na(NewInternet_1) & is.na(NewInternet_5)  ~ "none",
TRUE ~ NA_character_
),
internet_dk = case_when(
NewInternet_1 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_3) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_2 == "'Don't Know/refused" & is.na(NewInternet_1) &
is.na(NewInternet_3) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_3 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_1) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_5 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_3) & is.na(NewInternet_1) & is.na(NewInternet_5)  ~ "dk",
TRUE ~ NA_character_
)
) %>%
select(-starts_with("NewInternet_"))
# Minor renaming
dat2017 <-
dat2017 %>%
rename(USMUS_TRUMPRACIST = USMIS_TRUMPRACIST)
table(dat2017 == dat2017_bjn)
which(dat2017 != dat2017_bjn)
dat2017
head(dat2017$direction)
head(which(dat2017 == "Wrong direction"))
checl_cols <- rep(0, length = ncol(dat2017))
check_cols <- rep(0, length = ncol(dat2017))
c <- 1
any(dat2017[c,] != dat2017_bjn[c,])
dat2017[c,] != dat2017_bjn[c,]
any(na.omit(dat2017[c,] != dat2017_bjn[c,]))
na.omit(dat2017[c,] != dat2017_bjn[c,])
na.omit(dat2017[c,] != dat2017_bjn[c,]))
na.omit(dat2017[c,] != dat2017_bjn[c,])
na.omit(c(1,2,3, NA))
na.omit(dat2017[,c] != dat2017_bjn[,c])
any(na.omit(dat2017[,c] != dat2017_bjn[,c]))
check_cols <- rep(0, length = ncol(dat2017))
for(i in 1:ncol(dat)) {
if(any(na.omit(dat2017[,c] != dat2017_bjn[,c]))) {
check_cols[i] <- 1L
}
}
check_cols <- rep(0, length = ncol(dat2017))
check_cols
check_cols <- rep(0, length = ncol(dat2017))
for(i in 1:ncol(dat2017)) {
if(any(na.omit(dat2017[,c] != dat2017_bjn[,c]))) {
check_cols[i] <- 1L
}
}
check_cols
c
check_cols <- rep(0, length = ncol(dat2017))
for(i in 1:ncol(dat2017)) {
if(any(na.omit(dat2017[,i] != dat2017_bjn[,i]))) {
check_cols[i] <- 1L
}
}
check_cols
which(check_cols == 1)
dat2017[,107]
which(dat2017[,107] != dat2017_bjn[,107])
dat2017[1027,107]
dat2017_bjn[1027,107]
dat2017$GERMANY_NUTS2
# Clean 2017 ---------------------------
# Load data
dat2017 <- read_dta("input/ADLraw/ADL 2017 Combined File.dta")
# Manually convert labels and set encoding
dat2017$GERMANY_NUTS2 <-
factor(dat2017$GERMANY_NUTS2,
labels = c("Stuttgart", "Karlsruhe", "Freiburg", "Tubingen",
"Oberbayern", "Niederbayern", "Oberpfalz", "Oberfranken",
"Mittelfranken", "Unterfranken", "Schwaben", "Berlin",
"Brandenburg", "Bremen", "Hamburg", "Darmstadt", "Geissen",
"Kassel", "Mecklenburg-Vorpommern", "Braunschweig",
"Hannover", "Lüneburg", "Weser-ems", "Düsseldorf",
"Köln", "Münster", "Detmold", "Arnsberg", "Koblenz",
"Trier", "Rheinhessen-Pfalz", "Saarland", "Dresden",
"Chemnitz", "Leipzig", "Sachsen-Anhalt",
"Schleswig-Holstein", "Thüringen"))
dat2017$GERMANY_NUTS2
table(dat2017$GERMANY_NUTS2 )
dat2017$GERMANY_NUTS2
table(dat2017$GERMANY_NUTS2)
# Clean 2017 ---------------------------
# Load data
dat2017 <- read_dta("input/ADLraw/ADL 2017 Combined File.dta")
# Change to labels
dat2017 <-
dat2017 %>%
mutate_at(vars(wave, id, yearborn, Age, InternationalTotalWeight),
as.numeric) %>%
mutate_at(vars(-wave, -id, -yearborn, -Age, -InternationalTotalWeight),
as_character) %>%
mutate(respnumber = seq(1:nrow(dat2017))) %>%
select(-id)
# Country name
dat2017 <-
dat2017 %>%
mutate(
country = as_factor(COUNTRY_2017) %>%
as.character() %>%
toupper(),
US_MUSLIM_SAMPLE = ifelse(country == "USA - MUSLIM", 1, 0),
country = recode(country, UK = "UNITED KINGDOM",
`USA - MAIN` = "USA",
`USA - MUSLIM` = "USA")
) %>%
select(-COUNTRY_2017, -COUNTRY_II)
# Date
dat2017 <-
dat2017 %>%
mutate(
date = as.character(Date),
year = case_when(
country == "MEXICO" ~ NA_real_,
TRUE ~ str_sub(Date, start = -4) %>% as.numeric()
),
month = case_when(
country == "MEXICO" ~ NA_real_,
TRUE ~ str_sub(Date, end = 1) %>% as.numeric()
),
day = case_when(
country == "MEXICO" ~ NA_real_,
TRUE ~ str_sub(Date, start = 2, end = 3) %>% as.numeric()
)) %>%
select(-Date)
# Consolidate ethnicity and language
dat2017 <-
dat2017 %>%
select(-ETHNICITY_EURO_2017, -ETHNICITY_USA, -USRACE_2017,
-HISPANIC_2017, -US_RACELIST, -ETHNICITY_MEXICO_OTHER,
-ETHNICITY_USA_STRING, -LANG_FRANCE, -LANG_GERMANY, -LANG_UK) %>%
unite(ethnicity, c(CombRace_USAONLY, ETHNICTY_MEXICO,
ETHNICITY_UK_NEW, ETHNICITY_FRANCE_NEW,
ETHNICITY_GERMANY_NEW), sep = "", na.rm = T) %>%
mutate(LANG_USA_STRING = na_if(LANG_USA_STRING, "") %>%
recode(`1` = "ENGLISH", `2` = "ARABIC")) %>%
unite(language, starts_with("LANG_"), sep = "", na.rm = T)
# Consolidate geo
dat2017 <-
dat2017 %>%
select(-GEO_EURO_2017) %>%
unite(geo1, c(georecode, GEO_USA, GEO_MEXICO, GEO_UK_NEW,
GEO_FRANCE_NEW, GEO_GERMANY_NEW),
sep = "", na.rm = T) %>%
unite(geo2, c(GEOII_USA, UK_NUTS1, GERMANY_NUTS1, FRANCE_NUTS1),
sep = "", na.rm = T) %>%
unite(geo3, c(US_STATE, UK_NUTS2, GERMANY_NUTS2, FRANCE_NUTS2),
sep = "", na.rm = T)
# religion
dat2017 <-
dat2017 %>%
select(-US_RELIGIONLIST, -USAMUSLIM_RELIGION, -RELIGION_STRINGOTHER)
# age
dat2017 <-
dat2017 %>%
select(-New_Age, -New_Age50, -ED_Age2, -EDAGE_2017)
# phone type
dat2017 <-
dat2017 %>%
select(-PHONETYPE_RECODE)
# Internet access
dat2017 <-
dat2017 %>%
mutate(
internet_home = case_when(
NewInternet_1 == "Yes, home" ~ "yes",
NewInternet_2 == "Yes, home" ~ "yes",
NewInternet_3 == "Yes, home" ~ "yes",
TRUE ~ NA_character_
),
internet_phone = case_when(
NewInternet_1 == "Yes, phone" ~ "yes",
NewInternet_2 == "Yes, phone" ~ "yes",
NewInternet_3 == "Yes, phone" ~ "yes",
TRUE ~ NA_character_
),
internet_job = case_when(
NewInternet_1 == "Yes, job" ~ "yes",
NewInternet_2 == "Yes, job" ~ "yes",
NewInternet_3 == "Yes, job" ~ "yes",
TRUE ~ NA_character_
),
internet_none = case_when(
NewInternet_1 == "No" & is.na(NewInternet_2) & is.na(NewInternet_3) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_2 == "No" & is.na(NewInternet_1) & is.na(NewInternet_3) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_3 == "No" & is.na(NewInternet_2) & is.na(NewInternet_1) &
is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "none",
NewInternet_4 == "No" & is.na(NewInternet_2) & is.na(NewInternet_3) &
is.na(NewInternet_1) & is.na(NewInternet_5)  ~ "none",
TRUE ~ NA_character_
),
internet_dk = case_when(
NewInternet_1 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_3) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_2 == "'Don't Know/refused" & is.na(NewInternet_1) &
is.na(NewInternet_3) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_3 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_1) & is.na(NewInternet_4) & is.na(NewInternet_5)  ~ "dk",
NewInternet_5 == "'Don't Know/refused" & is.na(NewInternet_2) &
is.na(NewInternet_3) & is.na(NewInternet_1) & is.na(NewInternet_5)  ~ "dk",
TRUE ~ NA_character_
)
) %>%
select(-starts_with("NewInternet_"))
# Minor renaming
dat2017 <-
dat2017 %>%
rename(USMUS_TRUMPRACIST = USMIS_TRUMPRACIST)
dat2017_orig <- dat2017
which(check_cols == 1)
col_binds(dat2017_orig[,107], dat2017_bjn[,107])
bind_cols(dat2017_orig[,107], dat2017_bjn[,107])
test<- bind_cols(dat2017_orig[,107], dat2017_bjn[,107])
names(test) <- c("c1", "c2")
test %>% filter(c1 != c2)
test %>% filter(c1 != c2) %>% unique()
